##### SET-UP #####

### load libraries
library(biglm); library(reshape2); library(data.table); library(lme4)
library(arm); library(ggplot2); library(patchwork)

### set path
setwd("/path/to/this/replication/folder")

### set seed
set.seed(2365256)

### set lmer control
my_control <- lmerControl(optimizer = "nloptwrap", calc.derivs = FALSE,
                          check.rankX = c("silent.drop.cols"), 
                          optCtrl = list(maxfun = 25000))

##### DESCRIPTIVE STATISTICS -- INTRODUCTION #####

### read in data
elite <- read.csv("elite-merged.csv", stringsAsFactors = F)
mass <- read.csv("mass-merged.csv", stringsAsFactors = F)
df.final <- read.csv("final-many-to-many.csv", stringsAsFactors = F)

### summarize data as a whole
# number of elite respondents in total
nrow(elite) 
# number of citizen respondents in total
nrow(mass) 
# number of country-years
nrow(df.final) 
# number of countries
length(unique(df.final$country)) 
# number of years
length(unique(df.final$year)) 
# NB: number of unique surveys is counted manually from the survey documentation

##### DESCRIPTIVE STATISTICS -- 'Data Around the World' SECTION #####

### footnote 7, the design effect
# read in data - takes 5-10 minutes
dyads <- fread("final-dyads.csv", showProgress = T, stringsAsFactors = T)
# multiply through by the weights
w <- dyads$m_w*dyads$e_w
# invert the weights
w_i <- 1/w
# store the length of the vector
n <- length(w_i)
# compute design effect, using the equation supplied in PracTools:::deffK()
kish_deff <- 1 + sum((w_i - mean(w_i))^2)/n/mean(w_i)^2
kish_deff

### footnote 10, proportion of missing income information
mm <- split(mass, f = list(mass$country, mass$year))
mm <- mm[lapply(mm, nrow) > 0]
mm <- lapply(mm, function(x) (nrow(x[which(is.na(x$income)),])/nrow(x)))
mm <- data.frame(cy = names(mm), prop = unlist(mm))
mean(mm$prop)

### footnote 11, breakdown of affluence variables used
summary(as.factor(df.final$mclass_var))

### footnote 12, years appearing in the data
sort(unique(df.final$year))

##### DESCRIPTIVE STATISTICS -- 'Measuring Congruence' SECTION #####

### number of dyadic observations
nrow(dyads)

### bootstrap global parameters
n_samp <- 250
samp_size <- 50000

### clean up before estimation
rm(kish_deff, elite, mm, mass, n, w, w_i)

##### FIGURE 1 #####

### Model 1: MELR model
# change reference category to the richest
dyads$class <- relevel(as.factor(dyads$class), ref = "4")
# multiply through the weights
dyads$comb_weight <- dyads$m_w*dyads$e_w
# run the model -- takes 5 minutes -- NB: weights need to be formula, hence =~ 
mod1 <- biglm(emd ~ class + as.factor(m_ideology_scale_orig) + 
                   as.factor(e_ideology_scale_orig) + as.factor(scalediff), 
                 data = dyads, weights =~dyads$comb_weight)
summary(mod1)

### Model 2: Bootstrapping -- takes approximately 2 hours
# create a holding data frame
est <- data.frame(matrix(ncol = 5, nrow = n_samp))
# loop over bootstrap iterations
pb <- txtProgressBar(min = 0, max = n_samp, style = 3)
for(i in 1:n_samp){
  
  # sample data efficiently
  dat <- dyads[sample(.N, samp_size)]
  
  # run the regression
  mod <- lmer(emd ~ as.factor(class) + as.factor(m_ideology_scale_orig) + 
                as.factor(e_ideology_scale_orig) + as.factor(scalediff) +
                (1 | m_mass_id) + (1 | e_elite_id), weights = comb_weight, 
              data = dat, control = my_control)
  
  # save the output
  est[i,] <- unname(fixef(mod)[c(1:5)])
  
  # keep track
  setTxtProgressBar(pb, i)
}
# print estimates
apply(est[,-1], 2, mean)
# standard deviations
sqrt(apply(est[,-1], 2, var))
# 95% confidence intervals
apply(est[,-1], 2, quantile, c(.025,.975))

### Model 3: EMD approach
# subset to where we have at least 30 observations
df <- df.final[which(df.final$nobs_elite > 30),]
# reshape data to be country-year-class observations
df <- df[,c("country", "year", "emd_lessaffluent", "emd_midloaffluent",
            "emd_midaffluent", "emd_midhiaffluent", "emd_moreaffluent", 
            "escale", "mscale")]
df <- reshape2::melt(df, id.vars = c("country", "year", "escale", "mscale"), 
           variable.name = "emd")
# fix class variable
df$class <- rep(NA,nrow(df))
df$class[which(df$emd == "emd_lessaffluent")] <- "poor"
df$class[which(df$emd == "emd_midloaffluent")] <- "middle low"
df$class[which(df$emd == "emd_midaffluent")] <- "middle"
df$class[which(df$emd == "emd_midhiaffluent")] <- "middle high"
df$class[which(df$emd == "emd_moreaffluent")] <- "rich"
# fix emd variable
df$emd <- NULL
names(df)[which(names(df) == "value")] <- "emd"
# create variable for different scales
df$scalediff <- rep(NA,nrow(df))
for(i in 1:nrow(df)){
  if(as.character(df$escale[i]) == as.character(df$mscale[i])){
    df$scalediff[i] <- 1
  } else {
    df$scalediff[i] <- 0
  }
}
# make the rich the baseline category
df$class <- factor(df$class, levels = c("rich", "poor", "middle low",
                                        "middle", "middle high")) 
# estimate the model
mod3 <- glm(emd ~ as.factor(class) + as.factor(country) + as.factor(year) +
              as.factor(mscale) + as.factor(escale) + as.factor(scalediff), 
            data = df)
summary(mod3)
nobs(mod3)

### Get results into shape for plotting
# predict out of the models
pl <- rbind(as.data.frame(unname(confint(mod1))[c(1:5),]),
            as.data.frame(unname(t(apply(est, 2, quantile, c(.025, .975))))),
            as.data.frame(unname(confint(mod3))[c(1:5),]))
names(pl) <- c("lo", "hi")
pl$mean <- c(unname(coef(mod1)[c(1:5)]), 
             unname(apply(est, 2, mean)),
             unname(coef(mod3)[c(1:5)]))
# name variables and models
pl$var <- rep(c("quant4", "quant0", "quant1", "quant2", "quant3"), 3)
pl$mod <- c(rep("mod1", 5), rep("mod2", 5), rep("mod3", 5))
# drop the rich/intercept
pl <- pl[which(pl$var != "quant4"),]
row.names(pl) <- NULL
# add x locations and statistical significance
pl$loc <- as.numeric(gsub("[A-Z,a-z]", "", pl$var)) + .85
pl$loc <- ifelse(pl$mod == "mod2", pl$loc + .15, pl$loc)
pl$loc <- ifelse(pl$mod == "mod3", pl$loc + .30, pl$loc)
pl$cov <- ifelse(pl$lo < 0 & pl$hi > 0, 0, 1)
# ensure numeric variables are numeric
for(i in c(1:3,6:7)){
  pl[,i] <- as.numeric(pl[,i])
}

### finally, make the plot
# make the plot
fig1 <- ggplot(pl) +
  theme_bw() +
  theme(panel.grid.minor = element_blank(),
        axis.text = element_text(color = "black"),
        panel.border = element_rect(fill = NA, size = 1, linetype = "solid"),
        text = element_text(family = "Helvetica")) +
  geom_hline(yintercept = 0, linetype = "longdash", color = "darkgray") +
  geom_linerange(aes(x = loc, ymin = lo, ymax = hi)) +
  geom_point(shape = 21, aes(x = loc, y = mean, fill = factor(cov)), 
             show.legend = F) +
  scale_fill_manual(values = c("white", "black")) +
  labs(title = NULL,
       y = paste("Predicted difference from affluent in absolute",
                 "left-right distance to legislators", sep = "\n"),
       x = "Affluence percentile") +
  scale_x_continuous(breaks = seq(1:4), 
                     labels = c("0-20th", "20-40th", "40-60th", "60-80th"))

# save it
postscript("fg1.eps", width = 5, height = 4, colormodel = "cymk")
fig1
dev.off()

##### OTHER RESULTS -- 'Is There An Affluence Bias?' SECTION #####

### compute main effect size -- in text
# establish a baseline, the mean EMD among the rich
baseline <- as.vector(summary(df.final$emd_moreaffluent)[3])
# the magnitude as a proportion of this baseline - 116%, so effect size of 16%
(baseline + unname(coef(mod3)[2]))/baseline

### footnote 20, difference in means effect size
diff_means <- t.test(abs(df.final$mmean_poor - df.final$emean),
                     abs(df.final$mmean_rich - df.final$emean))
diff_means
# the magnitude as a proportion of the baseline -- 112%, so effect size of 12%
(baseline + (unname(diff_means$estimate[1]) - 
               unname(diff_means$estimate[2])))/baseline

### robustness to case where poor and rich diverge -- in text
# subset to where we have at least 30 observations
df <- df.final[which(df.final$nobs_elite > 30),]
df$diff <- abs(df$mmean_poor - df$mmean_rich)
df <- df[which(df$diff > unname(summary(df$diff)[5])),]
# create a baseline for the effect size calculation below
baseline <- as.vector(summary(df$emd_moreaffluent)[3])
# reshape data to be country-year-class observations
df <- df[,c("country", "year", "emd_lessaffluent", "emd_midloaffluent",
            "emd_midaffluent", "emd_midhiaffluent", "emd_moreaffluent", 
            "escale", "mscale")]
df <- reshape2::melt(df, id.vars = c("country", "year", "escale", "mscale"), 
           variable.name = "emd")
# fix class variable
df$class <- rep(NA,nrow(df))
df$class[which(df$emd == "emd_lessaffluent")] <- "poor"
df$class[which(df$emd == "emd_midloaffluent")] <- "middle low"
df$class[which(df$emd == "emd_midaffluent")] <- "middle"
df$class[which(df$emd == "emd_midhiaffluent")] <- "middle high"
df$class[which(df$emd == "emd_moreaffluent")] <- "rich"
# fix emd variable
df$emd <- NULL
names(df)[which(names(df) == "value")] <- "emd"
# create variable for different scales
df$scalediff <- rep(NA,nrow(df))
for(i in 1:nrow(df)){
  if(as.character(df$escale[i]) == as.character(df$mscale[i])){
    df$scalediff[i] <- 1
  } else {
    df$scalediff[i] <- 0
  }
}
# make the rich the baseline category
df$class <- factor(df$class, levels = c("rich", "poor", "middle low",
                                        "middle", "middle high")) 
# estimate the model
mod4 <- glm(emd ~ as.factor(class) + as.factor(country) + as.factor(year) +
              as.factor(mscale) + as.factor(escale) + as.factor(scalediff), 
            data = df)
summary(mod4)
nobs(mod4)
# effect size - poor are 131% of the distance the rich are, so 31%
(baseline + unname(coef(mod4)[2]))/baseline

# clean up
rm(list=ls()[!(ls() %in% c("df.final", "my_control"))])

##### FIGURE 2 #####

### get data into shape for plotting
pl <- df.final
# subset to 1995 and later for visual clarity; drop unnecessary variables
pl <- pl[which(pl$year > 1994),
         c("country", "region", "year", 
           "emd_lessaffluent_full", "emd_moreaffluent_full")]
# create a difference variable
pl$y <- pl$emd_lessaffluent - pl$emd_moreaffluent
# remove unneeded columns
pl$emd_lessaffluent_full <- pl$emd_moreaffluent_full <- NULL
pl <- pl[order(pl$region, pl$country),]
# separate regions with a blank space
pl$loc <- as.numeric(factor(pl$country, levels = rev(unique(pl$country))))
pl$loc[which(pl$region == "Europe")] <- 
  pl$loc[which(pl$region == "Europe")] + 1
pl$loc[which(pl$region == "Australasia")] <- 
  pl$loc[which(pl$region == "Australasia")] + 2
# create breaks for axis ticks
breaks <- c(-max(pl$y), 0, max(pl$y))

# make the plot
fig2 <- ggplot(pl, aes(x = year, y = loc)) + 
  theme_bw() +
  theme(panel.grid = element_blank(), 
        panel.border = element_rect(fill = NA, size = 1,linetype = "solid"),
        axis.text = element_text(color = "black"),
        plot.background = element_blank(),
        panel.spacing = unit(c(0,0,0,0), "mm"),
        plot.margin = unit(c(0,3,1,0), "mm"),
        panel.background = element_blank(),
        legend.position = "bottom",
        text = element_text(family = "Helvetica")) +
  geom_tile(aes(fill = y), color = "black") +
  scale_y_continuous(breaks = unique(pl$loc), 
                     labels = unique(pl$country)) +
  scale_fill_gradient2(low = "blue", mid = "white", high = "red", 
                       midpoint = 0, breaks = breaks, 
                       limits = c(-max(pl$y), max(pl$y)),
                       labels = c("-0.40", "0.00", "0.40")) +
  labs(title = NULL, y = NULL, x = "Year") +
  coord_cartesian(xlim = c(1994.5, 2015.5), expand = 0) +
  guides(fill = guide_colorbar(barwidth = 10, barheight = 1, ticks = FALSE,
                               limits = c(-max(pl$y), max(pl$y)),
                               title = "Affluence bias", title.position = "top", 
                               title.hjust = 0.5))

# save it
postscript("fg2.eps", width = 6.5, height = 8, colormodel = "cymk")
fig2
dev.off()

### clean up
rm(list=ls()[!(ls() %in% c("df.final", "my_control"))])

##### DESCRIPTIVE STATISTICS -- 'Beyond Left and Right' SECTION -- LAT.AM. #####

### read in the Latin America data -- takes a few minutes
dyads <- read.csv("latam-dyads.csv", stringsAsFactors = F)

### footnote 21: unique years in the Latin America data
sort(unique(dyads$m_year))

##### FIGURE 3 #####

### estimate the models -- each takes ~5-10 minutes
# make the rich the baseline category
dyads$m_wealth <- factor(dyads$m_wealth, levels = c("4", "0", "1", "2", "3"),
                         labels = c("4", "0", "1", "2", "3"))
# ideology model
mod1 <- lmer(emd_ideology ~ as.factor(m_wealth) +
               (1 | m_mass_id) + (1 | e_elite_id), 
             weights = comb_weight_ideology, data = dyads, 
             control = my_control)
summary(mod1)
# economic policy model
mod2 <- lmer(emd_econ ~ as.factor(m_wealth) +
               (1 | m_mass_id) + (1 | e_elite_id), 
             weights = comb_weight_econ, data = dyads, 
             control = my_control)
summary(mod2)
# same-sex marriage model
mod3 <- lmer(emd_samesex ~ as.factor(m_wealth) +
               (1 | m_mass_id) + (1 | e_elite_id), 
             weights = comb_weight_samesex, data = dyads, 
             control = my_control)
summary(mod3)

### get data into shape for plotting
# simulate: would be *much* better to bootstrap but computationally not feasible
ses <- c(summary(mod1)$coefficients[,2],
         summary(mod2)$coefficients[,2],
         summary(mod3)$coefficients[,2])
means <- c(unname(fixef(mod1)[c(1:5)]), 
           unname(fixef(mod2)[c(1:5)]), 
           unname(fixef(mod3)[c(1:5)]))
pl <- data.frame(cbind(means, ses))
names(pl) <- c("mean", "se")
# create upper and lower bound variables
pl$lo <- pl$mean - 1.96*pl$se
pl$hi <- pl$mean + 1.96*pl$se
pl$se <- NULL
# name variables and models
pl$var <- rep(c("quant4", "quant0", "quant1", "quant2", "quant3"), 3)
pl$mod <- c(rep("mod1", 5), rep("mod2", 5), rep("mod3", 5))
# create nice names for plotting
pl$dv <- c(rep("Left-right placement", 5), 
           rep("Economic policy", 5), 
           rep("Same-sex marriage", 5))
# factor them so they appear in the correct order
pl$dv <- factor(pl$dv, levels = c("Left-right placement", "Economic policy",
                                  "Same-sex marriage"))
# drop the baseline category
pl <- pl[which(pl$var != "quant4"),]
# add x locations
pl$loc <- as.numeric(gsub("[A-Z,a-z]", "", pl$var)) + 1
# add statistical significance
pl$cov <- ifelse(pl$lo < 0 & pl$hi > 0, 0, 1)
for(i in c(1:3, 7:8)){
  pl[,i] <- as.numeric(pl[,i])
}

# make the plot
fig3 <- ggplot(pl) +
  theme_bw() +
  theme(panel.grid.minor = element_blank(),
        axis.text = element_text(color = "black"),
        panel.border = element_rect(fill = NA, size = 1, linetype = "solid"),
        panel.spacing.x = unit(1, "lines"),
        text = element_text(family = "Helvetica")) +
  geom_hline(yintercept = 0, linetype = "longdash", color = "darkgray") +
  facet_grid(. ~ dv) +
  geom_linerange(aes(x = loc, ymin = lo, ymax = hi)) +
  geom_point(shape = 21, aes(x = loc, y = mean, fill = factor(cov)), 
             show.legend = F) +
  scale_fill_manual(values = c("white", "black")) +
  labs(title = NULL,
       y = paste("Predicted difference from affluent in distance",
       "to legislators by issue-area", sep = "\n"),
       x = "Affluence percentile") +
  scale_x_continuous(breaks = seq(1:4), 
                     labels = c("0-20th", "20-40th", "40-60th", "60-80th"),
                     limits = c(.75, 4.25))

# save it
postscript("fg3.eps", width = 7, height = 4, colormodel = "cymk")
fig3
dev.off()

### effect sizes
# compute baselines
baseline_leftright <- as.vector(summary(df.final$emd_moreaffluent)[3])
baseline_econ <- as.vector(summary(df.final$emd_moreaffluent_econ)[3])
baseline_samesex <- as.vector(summary(df.final$emd_moreaffluent_samesex)[3])
# compute effect sizes - 11%, 7%, 37%
(baseline_leftright + fixef(mod1)[2])/baseline_leftright
(baseline_econ + fixef(mod2)[2])/baseline_econ
1 - (baseline_samesex + fixef(mod3)[2])/baseline_samesex # subtract since < 1

### clean up
rm(list=ls()[!(ls() %in% c("df.final", "my_control"))])

##### DESCRIPTIVE STATISTICS -- 'Beyond Left and Right' SECTION -- SWEDEN  #####

### read in data -- takes a minute
dyads <- read.csv("sweden-dyads.csv", stringsAsFactors = F)

### years in the data
sort(unique(dyads$m_year))

##### FIGURE 4 #####

### estimate the models -- each takes a few minutes
# make the rich the baseline category
dyads$m_occupation <- factor(dyads$m_occupation, 
                             levels = c("professional", "worker", "other"),
                             labels = c("professional", "worker", "other"))
# privatization of health care model
mod1 <- lmer(emd_health_private ~ as.factor(m_occupation) +
               (1 | m_mass_id) + (1 | e_elite_id), 
             weights = e_w_health_private, data = dyads,
             control = my_control)
summary(mod1)
# reduce the size of public sector model
mod2 <- lmer(emd_public_sector ~ as.factor(m_occupation) +
               (1 | m_mass_id) + (1 | e_elite_id), 
             weights = e_w_public_sector, data = dyads, 
             control = my_control)
summary(mod2)
# reduce inequality model
mod3 <- lmer(emd_reduce_inequality ~ as.factor(m_occupation) +
               (1 | m_mass_id) + (1 | e_elite_id), 
             weights = e_w_reduce_inequality, data = dyads, 
             control = my_control)
summary(mod3)
# apply to join nato model
mod4 <- lmer(emd_nato ~ as.factor(m_occupation) +
               (1 | m_mass_id) + (1 | e_elite_id), 
             weights = e_w_nato, data = dyads, 
             control = my_control)
summary(mod4)
# reduce the number of refugees model
mod5 <- lmer(emd_fewer_refugees ~ as.factor(m_occupation) +
               (1 | m_mass_id) + (1 | e_elite_id), data = dyads, 
             weights = e_w_fewer_refugees, 
             control = my_control)
summary(mod5)
# ban pornography model
mod6 <- lmer(emd_pornography ~ as.factor(m_occupation) +
               (1 | m_mass_id) + (1 | e_elite_id), 
             weights = e_w_pornography, data = dyads, 
             control = my_control)
summary(mod6)

### get into shape for plotting
# simulate: would be *much* better to bootstrap but computationally not feasible
ses <- c(summary(mod1)$coefficients[,2],
         summary(mod2)$coefficients[,2],
         summary(mod3)$coefficients[,2],
         summary(mod4)$coefficients[,2],
         summary(mod5)$coefficients[,2],
         summary(mod6)$coefficients[,2])
means <- c(unname(fixef(mod1)[c(1:3)]), 
           unname(fixef(mod2)[c(1:3)]), 
           unname(fixef(mod3)[c(1:3)]),
           unname(fixef(mod4)[c(1:3)]),
           unname(fixef(mod5)[c(1:3)]),
           unname(fixef(mod6)[c(1:3)]))
pl <- data.frame(cbind(means, ses))
names(pl) <- c("mean", "se")
# create upper and lower bound variables
pl$lo <- pl$mean - 1.96*pl$se
pl$hi <- pl$mean + 1.96*pl$se
pl$se <- NULL
# name variables and models
pl$var <- rep(c("professional", "worker", "other"), 6)
pl$mod <- c(rep("mod1", 3), rep("mod2", 3), rep("mod3", 3), rep("mod4", 3),
            rep("mod5", 3), rep("mod6", 3))
# create nice names for plotting
pl$dv <- c(rep("Privatization", 3),
           rep("Public sector", 3),
           rep("Inequality", 3),
           rep("NATO", 3),
           rep("Refugees", 3), 
           rep("Pornography", 3))
# factor them so they appear in the correct order
pl$dv <- factor(pl$dv, levels = c("Privatization", "Public sector", 
                                  "Inequality", "NATO", "Refugees", 
                                  "Pornography"))
# drop the baseline category
pl <- pl[which(pl$var != "professional"),]
# add x locations
pl$loc <- rep(1,nrow(pl))
pl$loc[which(pl$var == "other")] <- pl$loc[which(pl$var == "other")] + 1
# add statistical significance
pl$cov <- ifelse(pl$lo < 0 & pl$hi > 0, 0, 1)
for(i in c(1:3,7:8)){
  pl[,i] <- as.numeric(pl[,i])
}

# make the plot
fig4 <- ggplot(pl) +
  theme_bw() +
  theme(panel.grid.minor = element_blank(),
        axis.text = element_text(color = "black"),
        panel.border = element_rect(fill = NA, size = 1, linetype = "solid"),
        panel.spacing.x = unit(1, "lines"),
        text = element_text(family = "Helvetica")) +
  geom_hline(yintercept = 0, linetype = "longdash", color = "darkgray") +
  facet_grid(. ~ dv) +
  geom_linerange(aes(x = loc, ymin = lo, ymax = hi)) +
  geom_point(shape = 21, aes(x = loc, y = mean, fill = factor(cov)), 
             show.legend = F) +
  scale_fill_manual(values = c("white", "black")) +
  labs(title = NULL,
       y = paste("Predicted difference from affluent in",
                 "distance to legislators by issue-area", sep = "\n"),
       x = "Affluence percentile") +
  scale_x_continuous(breaks = seq(1:2), 
                     labels = c("Workers", "Other"),
                     limits = c(.75, 2.25))

# save it
postscript("fg4.eps", width = 7, height = 3, colormodel = "cymk")
fig4
dev.off()

### clean up
rm(list=ls()[!(ls() %in% c("df.final", "my_control"))])

##### DESCRIPTIVE STATISTICS -- 'Beyond Left and Right' SECTION -- AFRICA  #####

### read in data -- takes a minute
dyads <- read.csv("africa-dyads.csv", stringsAsFactors = F)

### descriptive statistics (footnote 22)
length(unique(dyads$m_mass_id))
length(unique(dyads$e_elite_id))
sort(unique(dyads$m_country))

##### FIGURE 5 #####

### estimate the models -- each takes a few minutes
# make the rich the baseline category
dyads$m_wealth <- factor(dyads$m_wealth, levels = c("4", "0", "1", "2", "3"),
                         labels = c("4", "0", "1", "2", "3"))
# poverty model
mod1 <- lmer(emd_poverty ~ as.factor(m_wealth) +
               (1 | m_mass_id) + (1 | e_elite_id), data = dyads, 
             control = my_control)
summary(mod1)
# agriculture model
mod2 <- lmer(emd_agriculture ~ as.factor(m_wealth) +
               (1 | m_mass_id) + (1 | e_elite_id), data = dyads, 
             control = my_control)
summary(mod2)
# social rights model
mod3 <- lmer(emd_social_rights ~ as.factor(m_wealth) +
               (1 | m_mass_id) + (1 | e_elite_id), data = dyads, 
             control = my_control)
summary(mod3)
# violence model
mod4 <- lmer(emd_violence ~ as.factor(m_wealth) +
               (1 | m_mass_id) + (1 | e_elite_id), data = dyads, 
             control = my_control)
summary(mod4)

### get into shape for plotting
# simulate: would be *much* better to bootstrap but computationally not feasible
ses <- c(summary(mod1)$coefficients[,2],
         summary(mod2)$coefficients[,2],
         summary(mod3)$coefficients[,2],
         summary(mod4)$coefficients[,2])
means <- c(unname(fixef(mod1)[c(1:5)]), 
           unname(fixef(mod2)[c(1:5)]), 
           unname(fixef(mod3)[c(1:5)]),
           unname(fixef(mod4)[c(1:5)]))
pl <- data.frame(cbind(means, ses))
names(pl) <- c("mean", "se")
# create upper and lower bound variables
pl$lo <- pl$mean - 1.96*pl$se
pl$hi <- pl$mean + 1.96*pl$se
pl$se <- NULL
# name variables and models
pl$var <- rep(c("quant4", "quant0", "quant1", "quant2", "quant3"), 4)
pl$mod <- c(rep("mod1", 5), rep("mod2", 5), rep("mod3", 5), rep("mod4", 5))
# create nice names for plotting
pl$dv <- c(rep("Poverty", 5), 
           rep("Agriculture", 5), 
           rep("Social rights", 5),
           rep("Violence", 5))
# factor them so they appear in the correct order
pl$dv <- factor(pl$dv, levels = c("Agriculture", "Poverty", "Social rights",
                                  "Violence"))
# drop the baseline category
pl <- pl[which(pl$var != "quant4"),]
# add x locations
pl$loc <- as.numeric(gsub("[A-Z,a-z]", "", pl$var)) + 1
# add statistical significance
pl$cov <- ifelse(pl$lo < 0 & pl$hi > 0, 0, 1)
for(i in c(1:3,7:8)){
  pl[,i] <- as.numeric(pl[,i])
}

# make the plot
fig5 <- ggplot(pl) +
  theme_bw() +
  theme(panel.grid.minor = element_blank(),
        axis.text = element_text(color = "black"),
        panel.border = element_rect(fill = NA, size = 1, linetype = "solid"),
        panel.spacing.x = unit(1, "lines"),
        text = element_text(family = "Helvetica")) +
  geom_hline(yintercept = 0, linetype = "longdash", color = "darkgray") +
  facet_grid(. ~ dv) +
  geom_linerange(aes(x = loc, ymin = lo, ymax = hi)) +
  geom_point(shape = 21, aes(x = loc, y = mean, fill = factor(cov)),
             show.legend = F) +
  scale_fill_manual(values = c("white", "black")) +
  labs(title = NULL,
       y = paste("Predicted difference from affluent in", 
                 "distance to legislators by issue-area", sep = "\n"),
       x = "Affluence percentile") +
  scale_x_continuous(breaks = seq(1:4), 
                     labels = c(paste("0th-", "20th", sep = "\n"), 
                                paste("20th-", "40th", sep = "\n"), 
                                paste("40th-", "60th", sep = "\n"), 
                                paste("60th-", "80th", sep = "\n")),
                     limits = c(.75, 4.25))

# save it
postscript("fg5.eps", width = 7, height = 3, colormodel = "cymk")
fig5
dev.off()

### clean up
rm(list=ls()[!(ls() %in% c("df.final"))])

##### DIRECTION OF THE BIAS/FIGURE 6 #####
lapop <- read.csv("lapop-full.csv", stringsAsFactors = F)
pela <- read.csv("pela-full.csv", stringsAsFactors = F)

### create a holding dataframe
df <- data.frame(matrix(nrow = nrow(df.final), ncol = 11))
names(df) <- c("country", "year", "ideology_poor", "ideology_rich",
               "ideology_elite", "samesex_poor", "samesex_rich", 
               "samesex_elite", "econ_poor", "econ_rich", "econ_elite")

### store means by group on each issue dimension, for each country
pb <- txtProgressBar(min = 1, max = nrow(df.final), style = 3)
for(i in 1:nrow(df.final)){
  # select relevant data
  mdata <- lapop[which(lapop$country == df.final$country[i] & 
                         lapop$year == df.final$year[i]),]
  edata <- pela[which(pela$country == df.final$country[i] & 
                        pela$year == df.final$year[i]),]
  
  # skip if we don't have both mass and elite data
  if(nrow(edata[which(!is.na(edata$ideology_scaled)),]) == 0 |
     nrow(mdata[which(!is.na(mdata$ideology_scaled)),]) == 0){
    next
  } else {
    # create a temporary data frame of class measures, ranked by our preference
    class <- data.frame(cbind(mdata$country, mdata$wealth, mdata$income,
                              mdata$occupation, mdata$education))
    colnames(class) <- c("country", "wealth", "income", "occupation",
                         "education")
    
    # delete all NAs
    for(j in ncol(class):2){
      if(all(is.na(unique(class[,j]))) == T){
        class <- class[,-j]
      }
    }
    
    # make sure the loop uses our best class measure
    whichclass <- colnames(class)[2] # since 1 is country
    colnames(mdata)[grep(whichclass, names(mdata))] <- "class"
    
    # defensive programming checks
    if(!(all(is.na(edata$econ_weight)))){
      stopifnot(all.equal(sum(edata$econ_weight, na.rm=T), 1))
    }
    stopifnot(all.equal(sum(edata$ideology_weight, na.rm=T), 1))
    stopifnot(all.equal(sum(edata$samesex_weight, na.rm=T), 1))
    
    # save (weighted) means
    df$country[i] <- unique(mdata$country)
    df$year[i] <- unique(mdata$year)
    df$ideology_poor[i] <- 
      mean(mdata$ideology_scaled[which(mdata$class == "0")], na.rm=T)
    df$ideology_rich[i] <- 
      mean(mdata$ideology_scaled[which(mdata$class == "4")], na.rm=T)
    df$ideology_elite[i] <- 
      sum((edata$ideology_scaled*edata$ideology_weight), na.rm=T)
    df$samesex_poor[i] <- 
      mean(mdata$samesex_scaled[which(mdata$class == "0")], na.rm=T)
    df$samesex_rich[i] <- 
      mean(mdata$samesex_scaled[which(mdata$class == "4")], na.rm=T)
    df$samesex_elite[i] <- 
      sum((edata$samesex_scaled*edata$samesex_weight), na.rm=T)
    df$econ_poor[i] <- 
      mean(mdata$econ_scaled[which(mdata$class == "0")], na.rm=T)
    df$econ_rich[i] <- 
      mean(mdata$econ_scaled[which(mdata$class == "4")], na.rm=T)
    df$econ_elite[i] <- 
      sum((edata$econ_scaled*edata$econ_weight), na.rm=T)
  }
  
  # keep track of the loop
  setTxtProgressBar(pb, i)
}

### get the data into shape for plotting
# subset to relevant cases only
df <- df[which(!(is.na(df$country))),]
df[is.na(df)] <- NA
# compute differences for rescaling
df$ideology_poor_rich_diff <- df$ideology_rich - df$ideology_poor
df$ideology_poor_elite_diff <- df$ideology_elite - df$ideology_poor
df$samesex_poor_rich_diff <- df$samesex_rich - df$samesex_poor
df$samesex_poor_elite_diff <- df$samesex_elite - df$samesex_poor
df$econ_poor_rich_diff <- df$econ_rich - df$econ_poor
df$econ_poor_elite_diff <- df$econ_elite - df$econ_poor
# reshape the data
df <- aggregate(. ~ df$country, df[-c(1:2)], mean, na.rm = TRUE, 
                na.action = NULL)
names(df)[1] <- "country"
# reverse the order for plotting
df <- df[order(rev(df$country)),]
df$loc <- seq(1:nrow(df))

# make the plot -- first panel
fig6.1 <- ggplot(df, aes(y = loc)) +
  theme_bw() +
  theme(panel.grid.major.x = element_blank(),
        panel.grid.minor.x = element_blank(),
        axis.text = element_text(color = "black"),
        panel.grid.major.y = element_line(colour = "grey80"),
        panel.grid.minor.y = element_blank(),
        text = element_text(family = "Helvetica")) +
  geom_point(shape = 15, aes(x = econ_poor_elite_diff)) +
  geom_point(aes(x = econ_poor_rich_diff)) +
  geom_vline(xintercept = 0) +
  scale_y_continuous(limits = c(1, nrow(df)),
                     breaks = seq(1, nrow(df), 1),
                     labels = df$country, expand = c(.03,.03)) +
  labs(x = "Mean economic position", y = NULL, title = NULL)

# make the plot -- second panel
fig6.2 <- ggplot(df, aes(y = loc)) +
  theme_bw() +
  theme(panel.grid.major.x = element_blank(),
        panel.grid.minor.x = element_blank(),
        axis.text = element_text(color = "black"),
        panel.grid.major.y = element_line(colour = "grey80"),
        panel.grid.minor.y = element_blank(),
        text = element_text(family = "Helvetica")) +
  geom_point(shape = 15, aes(x = samesex_poor_elite_diff)) +
  geom_point(aes(x = samesex_poor_rich_diff)) +
  geom_vline(xintercept = 0) +
  scale_y_continuous(limits = c(1, nrow(df)),
                     breaks = seq(1, nrow(df), 1),
                     labels = df$country, expand = c(.03,.03)) +
  labs(x = "Mean same-sex marriage position", y = NULL, title = NULL)

# save it
fig6 <- wrap_plots(fig6.1, fig6.2, ncol = 2)
postscript("fg6.eps", width = 7, height = 4, colormodel = "cymk")
fig6
dev.off()

### clean up
rm(list=ls())

##### VERSION CONTROL #####
sessionInfo()
# R version 3.5.1 (2018-07-02)
# Platform: x86_64-apple-darwin15.6.0 (64-bit)
# Running under: macOS High Sierra 10.13.6
# 
# Matrix products: default
# BLAS: /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib
# LAPACK: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRlapack.dylib
# 
# locale:
# [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
# 
# attached base packages:
# [1] stats     graphics  grDevices utils     datasets  methods   base     
# 
# other attached packages:
# [1] patchwork_1.0.1   ggplot2_3.3.2     arm_1.11-1        MASS_7.3-51.6    
# [5] lme4_1.1-23       Matrix_1.2-18     data.table_1.12.8 reshape2_1.4.4   
# [9] biglm_0.9-2       DBI_1.1.0        
# 
# loaded via a namespace (and not attached):
# [1] Rcpp_1.0.5       compiler_3.5.1   pillar_1.4.4     nloptr_1.2.2.1  
# [5] plyr_1.8.6       tools_3.5.1      digest_0.6.25    boot_1.3-25     
# [9] statmod_1.4.34   lifecycle_0.2.0  tibble_3.0.2     gtable_0.3.0    
# [13] nlme_3.1-148     lattice_0.20-41  pkgconfig_2.0.3  rlang_0.4.6     
# [17] rstudioapi_0.11  coda_0.19-3      withr_2.2.0      dplyr_1.0.0     
# [21] stringr_1.4.0    generics_0.0.2   vctrs_0.3.1      tidyselect_1.1.0
# [25] grid_3.5.1       glue_1.4.1       R6_2.4.1         minqa_1.2.4     
# [29] farver_2.0.3     purrr_0.3.4      magrittr_1.5     scales_1.1.1    
# [33] ellipsis_0.3.1   splines_3.5.1    abind_1.4-5      colorspace_1.4-1
# [37] labeling_0.3     stringi_1.4.6    munsell_0.5.0    crayon_1.3.4       